/* Set up the log*/
cd $ohie
cap log close
global sysdate: disp %tdYYNNDD  date("`c(current_date)'", "DMY")
qui log using 	"./logs/diff_in_diff_brfss_$sysdate.log", replace

* Set up timer
timer clear 1
timer on 1 

/*----------------------------------------------------------------------*/
/* PROGRAM: brfss_diff_diff						*/
/*									*/
/* PURPOSE:								*/
/* [*] This code computes the summary statistics, specifically the 	*/
/* averages and sample sizes, for certain available characteristics of 	*/
/* the BRFSS Massachussetts data. Unlike the OHIE summary statistics, 	*/
/* where we compute averages for the outcomes and the predicted outcomes,*/
/* we compute only averages for certain characteristics. We report these */
/* averages in the bottom panel of the "ohie_brfss_sumstats_new" table.	*/
/* The OHIE summary statistics reported in the top panel of this table 	*/
/* are output by the "ohie_sumstats.do" file				*/				
/*									*/
/* OUTPUT:								*/
/* [*]	diff_in_diff_brfss: This output file contains the  	*/
/*	estimate and bootstrapped significance stars reported in the 	*/
/* 	bottom panel of the "ohie_brfss_sumstats_new" table.		*/
/* [*] 	/logs/diff_in_diff_brfss_<sysdate>: The log file for this program.*/
/*----------------------------------------------------------------------*/

* Set up display options
clear
set type double
set more off, permanently

*************************************************
* VARIABLES AND CONTROLS	  		*
*************************************************

local indata "$final/brfss.dta"
	
* Endogenous variable
local D	"D"

* Instrument
local Z	"Z"

* Weight variable
local wt "w"

* Specify the variables for which to compute internal and external 
* validity tests
local statsvars "english female age selfhealth_fair"

* Number of bootstrap replications
* The first replication of the bootstrap process has no re-sampling; in other 
* words the first replication always runs on the full original non-bootstrapped 
* sample.
local reps = 1001

* Define the seeds for the variables other than the outcomes
foreach v of local statsvars {
	global `v'_seed = 6574360
}
		
* Output Excel file for bootstrapped statistics
local out_file "diff_in_diff_brfss"
	
*************************************************
* RUN CODE			  		*
*************************************************
	
* Start loop for variables here
foreach v of local statsvars {

	* Set the seed for each variable separately
	set seed $`v'_seed

	* Define a separate matrix for each covariate/predicted outcome/outcome 
	* as an output matrix
	matrix define boot_`v' = J(`reps', 30, .)	

	* Initialize the local macro for renaming variables in the matrix
	local renamevars ""
	
*************************************************
* RUN TESTS			  		*
*************************************************
	
	* Start bootstrap replications here
	forval rep = 1/`reps' {
		
		* Set up and re-sample
		use "`indata'", clear	
		g male = 1-female if !mi(female)
		g age_below_median = age<=41 if !mi(age)	// using median age from Oregon
		g age_above_median = age>41 if !mi(age)
		g non_english = 1-english if !mi(english)
		
		* Initialize the iterator for matrix output
		local matrixiter = 1
		
		* Drop observations where the variable is missing
		* We do this before bootstrapping in order to ensure that each 
		* bootstrapping sample has the same sample size. In addition, 
		* this ensures that we do not oversample individuals with 
		* missing values when bootstrapping.
		drop if `v'==.				
		local obs = _N
		
		* Sample observations by D
		qui count if `D'==1
		local N_T = `r(N)'
		
		qui count if `D'==0
		local N_U = `r(N)'
		
		* N
		qui count if `v'==1
		local N_x = `r(N)'
		
		* Re-sample for bootstrap
		qui if `rep' > 1 {
			bsample
		}
				
		* Mean for all
		qui su `v'  [aweight=`wt']
		local mean_all = `r(mean)'
		
		* Mean for always takers
		qui su `v' if `D'==1 & `Z'==0   [aweight=`wt']
		local mean_AT = `r(mean)'
		
		* Mean for always takers and treated compliers
		qui su `v' if `D'==1 & `Z'==1  [aweight=`wt']
		local mean_ATTC = `r(mean)'	
		
		* Mean for never takers and untreated compliers		
		qui su `v' if `D'==0 & `Z'==0   [aweight=`wt']
		local mean_NTUC = `r(mean)'
		
		* Mean for untreated
		qui su `v' if `D'==0   [aweight=`wt']
		local mean_UT = `r(mean)'
		
		* Mean for treated individuals	
		qui su `v' if `D'==0
		local mean_T = `r(mean)'
		
		* Never takers
		qui su `v' if `D'==0 & `Z'==1   [aweight=`wt']
		local mean_NT = `r(mean)'
		
		* pB is defined as P(D=1|Z=0)
		qui su `D' if `Z' == 0   [aweight=`wt']
		local pB = `r(mean)'
		
		* pI is defined as P(D=1|Z=1)
		qui su `D' if `Z' == 1   [aweight=`wt']
		local pI = `r(mean)'
		
		* Number of always takers and never takers
		local N_AT = _N*`pB'
		local N_NT = _N*(1-`pI')
		
		* pB_X and pI_X if binary
		local pB_X = .
		local pI_X = .
		qui distinct `v'
		if `r(ndistinct)'==2 {
			qui su `D' if `Z' == 0 & `v' == 1   [aweight=`wt']
			local pB_X = `r(mean)'
			
			qui su `D' if `Z' == 1 & `v' == 1   [aweight=`wt']
			local pI_X = `r(mean)'
		}
		
		* Define fraction of the sample who are (un)treated compliers, always takers, and never takers
		qui su `Z'  [aweight=`wt']
		local N_TC = (`pI'-`pB')*`r(mean)'*_N
		local N_UC = (`pI'-`pB')*(1-`r(mean)')*_N
		local N_C = `N_TC' + `N_UC'

		* Calculate the treated compliers average 
		local mean_TC = (1/(`pI' - `pB'))*(`pI'*`mean_ATTC' - `pB'*`mean_AT')

		* Calculate the untreated compliers average 
		local mean_UC = (1/(`pI' - `pB'))*((1-`pB')*`mean_NTUC' - (1-`pI')*`mean_NT')
		
		* Calculate the compliers average
		qui su `v'  [aweight=`wt']
		local mean_C = (1/(`pI'-`pB'))*(`r(mean)' - `pB'*`mean_AT' - (1-`pI')*`mean_NT')
		
		* Output
		matrix boot_`v'[`rep', `matrixiter'] = `N_x'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' N_x"
		
		matrix boot_`v'[`rep', `matrixiter'] = `pB'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' pB"
		
		matrix boot_`v'[`rep', `matrixiter'] = `pI'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' pI"
		
		matrix boot_`v'[`rep', `matrixiter'] = `pB_X'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' pB_X"
		
		matrix boot_`v'[`rep', `matrixiter'] = `pI_X'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' pI_X"
		
		matrix boot_`v'[`rep', `matrixiter'] = `=`pI'-`pB''
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' frac_c"
		
		matrix boot_`v'[`rep', `matrixiter'] = `=1-`pI''
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' frac_nt"
		
		matrix boot_`v'[`rep', `matrixiter'] = `mean_all'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' mean_all"
		
		matrix boot_`v'[`rep', `matrixiter'] = `obs'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' N"
		
		matrix boot_`v'[`rep', `matrixiter'] = `N_AT'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' N_AT"
		
		matrix boot_`v'[`rep', `matrixiter'] = `N_NT'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' N_NT"
		
		matrix boot_`v'[`rep', `matrixiter'] = `N_T'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' N_T"
		
		matrix boot_`v'[`rep', `matrixiter'] = `N_U'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' N_UT"
		
		matrix boot_`v'[`rep', `matrixiter'] = `N_TC'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' N_TC"
		
		matrix boot_`v'[`rep', `matrixiter'] = `N_UC'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' N_UC"
		
		matrix boot_`v'[`rep', `matrixiter'] = `mean_AT'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' mean_AT"
		
		matrix boot_`v'[`rep', `matrixiter'] = `mean_C'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' mean_C"
		
		matrix boot_`v'[`rep', `matrixiter'] = `mean_NT'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' mean_NT"
		
		matrix boot_`v'[`rep', `matrixiter'] = `mean_UC'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' mean_UC"
		
		matrix boot_`v'[`rep', `matrixiter'] = `mean_TC'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' mean_TC"
		
		matrix boot_`v'[`rep', `matrixiter'] = `mean_NTUC'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' mean_NTUC"
		
		matrix boot_`v'[`rep', `matrixiter'] = `mean_UT'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' mean_UT"
		
		matrix boot_`v'[`rep', `matrixiter'] = `mean_T'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' mean_T"

		matrix boot_`v'[`rep', `matrixiter'] = `mean_AT'-`mean_C'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' AT_C_Diff"
		
		matrix boot_`v'[`rep', `matrixiter'] = `mean_C'-`mean_NT'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' C_NT_Diff"
		
		matrix boot_`v'[`rep', `matrixiter'] = `mean_AT'-`mean_NT'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' AT_NT_Diff"
		
		matrix boot_`v'[`rep', `matrixiter'] = `mean_TC'-`mean_UC'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' TC_UC_Diff"
		
		matrix boot_`v'[`rep', `matrixiter'] = `mean_AT'-`mean_TC'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' AT_TC_Diff"
		
		matrix boot_`v'[`rep', `matrixiter'] = `mean_UC'-`mean_NT'
		local ++matrixiter
		if `rep'==1 local renamevars "`renamevars' UC_NT_Diff"
				
	} // bootstrap loop ends here
		
	* Save the output matrix	
	svmat double boot_`v'
		
	* Delete extra variables and observations from the output matrix
	keep boot_`v'*
		
	gen obs_num = _n
	drop if obs_num>`reps'
	drop obs_num
				
	* Rename variables
	local i=1		
	foreach var of local renamevars {			
		rename boot_`v'`i' `var'
		local ++i
	}
	
	* Output the bootstrap statistics to Excel	
	bootstrap_statistics $output `out_file' `v'
}

timer off 1
timer list 1
local hours = `r(t1)'/3600
di "Computing time is `hours' hours"

cap qui log close
